Mac Mania 2

home *** CD-ROM | disk | FTP | other *** search

/ Mac Mania 2 / MacMania 2.toast / Demo's / Tools&Utilities / Programming / Programmers' Challenge / SwapBlocks.c < prev

Wrap

C/C++ Source or Header | 1994-04-12 | 11.5 KB | 375 lines | [TEXT/KAHL]

/* SwapBlocks --------------------------------------------- * * Response to Apr 94 MacTech Programmer's Challenge. * * Object: * Exchange contents of two adjacent memory blocks. * * Redirection: * This is an interesting problem, but what would make * this guy really useful? As stated, the blocks for * the challenge are 4i bytes long and start on 4j * aligned addresses. These are special circumstances * which apply to Memory Manager blocks, and then, only * on 68020 or later cpu's. Memory blocks on the 68000 * are merely even aligned and even length. Further, * this could be a word processor tool for swapping runs * of bytes, but we would have to relax the alignment * and size restrictions even further to arbitrary address * and length since we would almost always be pointing * to characters interior to a handle. * * I have written the routine to give its best * performance, subject to a specified minimum enforced * alignment and atom size (smallest unit to move). * This is controlled at compile time by: * * typedef long Atom, for len = 4i, addr = 4j, * typedef short Atom, for len = 2i, addr = 2j, * typedef Byte Atom, for len = any, addr = any. * * Note- due to an ancient law of portability, preprocessor * directives are not allowed to compare enums, types, * sizeof()s or anything else that has machine dependency * hidden in it. This means you have to #define the * AtomSize manually. This is needed to select the proper * performance crossover points for that type. * * But wait there's more... * You might not tolerate a 644 byte dedicated word * swapper in your text editor, but a 96 byte one * might fit. We handle that. * * You can tailor the routine to your requirements * for execution speed vs. code size by setting the * JobMode constant according to table: * * JobMode Buffers MonsterCopies MonsterSwaps * ---------------------------------------------- * Smallest | No No No * Small | No No Yes * Fast | Yes No Yes * Fastest | Yes Yes Yes * * billKarsh * */ #pragma options( honor_register, !assign_registers ) /* ---------------------------------------------------- */ #define Smallest 0 #define Small 1 #define Fast 2 #define Fastest 3 /* ---------------------------------------------------- */ /* ---------------------------- */ /* User Selectable Parameters /* ---------------------------- */ #define JobMode Fastest #define Verify_p1_LowerThan_p2 0 /* Sorry, you must #define your chosen Atom's size by hand. The preprocessor won't accept sizeof operators. Yuck! The XOvers below vary according to this size, so we have to know it. */ typedef long Atom; #define AtomSize 4 /* ---------------------------------------------------- */ #if JobMode >= Fast # define UseBuffer 1 #endif #if JobMode == Fastest # define MonsterCopy 1 #endif #if JobMode >= Small # define MonsterSwap 1 #endif /* ---------------------------------------------------- */ #define Lo3B 0x00ffffff /* ---------------------------------------------------- */ #if AtomSize == 4 # define FwdXOver 144 # define BckXOver 120 # define SwpXOver 44 #elif AtomSize == 2 # define FwdXOver 48 # define BckXOver 44 # define SwpXOver 32 #else # define FwdXOver 24 # define BckXOver 20 # define SwpXOver 12 #endif /* ---------------------------------------------------- */ #define FwdOp \ *dst++ = *src++ /* ---------------------------------------------------- */ #define BckOp \ *--pR = *--pL /* ---------------------------------------------------- */ #define SwpOp \ q = *pL; \ *pL++ = *pR; \ *pR++ = q /* ---------------------------------------------------- */ #define Cases3_1( op ) \ case 3: op; \ case 2: op; \ case 1: op /* ---------------------------------------------------- */ #define Cases7_1( op ) \ case 7: op; \ case 6: op; \ case 5: op; \ case 4: op; \ Cases3_1( op ) /* ---------------------------------------------------- */ #define CalcPasses( bits ) \ nS /= sizeof(Atom); \ q = nS & ((1 << bits) - 1); \ nS >>= bits; \ ++nS /* ---------------------------------------------------- */ #define Monster( op, cases ) \ switch( (short)q ) { \ case 0: \ while( --nS ) { \ op; \ cases( op ); \ } \ } /* ---------------------------------------------------- */ #if MonsterCopy == 1 #define CopyInc( dst, src, n ) \ nS = n; \ if( nS > FwdXOver ) { \ _CopyInc( \ (Atom*)(dst), (Atom*)(src), nS ); \ } \ else { \ pL = (Atom*)(dst); \ pR = (Atom*)(src); \ do { *pL++ = *pR++; } while(nS-=sizeof(Atom)); \ } #else #define CopyInc( dst, src, n ) \ nS = n; \ pL = (Atom*)(dst); \ pR = (Atom*)(src); \ do { *pL++ = *pR++; } while(nS-=sizeof(Atom)) #endif /* ---------------------------------------------------- */ #if MonsterCopy == 1 #define CopyDec( dst, src, n ) \ nS = n; \ pR = (Atom*)((Byte*)(dst) + nS); \ pL = (Atom*)((Byte*)(src) + nS); \ if( nS > BckXOver ) { \ CalcPasses( 2 ); \ Monster( BckOp, Cases3_1 ); \ } \ else { \ do { BckOp; } while(nS-=sizeof(Atom)); \ } #else #define CopyDec( dst, src, n ) \ nS = n; \ pR = (Atom*)((Byte*)(dst) + nS); \ pL = (Atom*)((Byte*)(src) + nS); \ do { BckOp; } while(nS-=sizeof(Atom)) #endif /* ---------------------------------------------------- */ #if MonsterSwap == 1 #define Swap \ if( nS > SwpXOver ) { \ CalcPasses( 3 ); \ Monster( SwpOp, Cases7_1 ); \ } \ else { \ do { SwpOp; } while(nS-=sizeof(Atom)); \ } #else #define Swap \ do { SwpOp; } while(nS-=sizeof(Atom)) #endif /* ---------------------------------------------------- */ #define MacroMania true /* ---------------------------------------------------- */ #if JobMode == Fastest /* _CopyInc ----------------------------------------------- * * Copy specified number of Bytes from src to dst. * * Addresses are incremented, so src and dst can overlap * iff dst <= src. * */ static void _CopyInc( register Atom *dst, register const Atom *src, register unsigned long nS ) { short q, pad; CalcPasses( 3 ); Monster( FwdOp, Cases7_1 ); } #endif /* SwapBlocks --------------------------------------------- * */ void SwapBlocks( void *p1, void *p2, void *swapPtr, unsigned long size1, unsigned long size2, unsigned long swapSize ) { register Atom *pL, *pR, *p0; register long nL, nR, nS, q; Boolean done; short pad; if( !(nL = size1) || !(nR = size2) ) return; p0 = p1; /* -------------------------------------------------------- If you can safely assume that p1 is always lower or same as p2, define Verify_p1_LowerThan_p2 = 0. (the #if section is not necessary). If "1" and "2" are simply labels, indicating nothing about position in memory of the blocks, then you must order them by activating the #if section. Define Verify_p1_LowerThan_p2 = 1. Ordering means comparing addresses, which treats them as 32-bit numbers, NO MATTER the current cpu addressing mode. If GetMMUMode returns true, we are in 32-bit mode - all 32-bits are significant. In 24-bit mode, when the cpu uses an address to load or store something, it totally ignores the high-byte of the address. The high-byte may be random garbage. In this mode we suppress any garbage before comparing by masking it to zero. */ #if Verify_p1_LowerThan_p2 == 1 pR = p2; if( !GetMMUMode() ) { p0 = (Atom*)((long)p0 & Lo3B); pR = (Atom*)((long)pR & Lo3B); } if( pR < p0 ) { q = (long)p0; p0 = pR; p2 = (Atom*)q; q = nL; nL = nR; nR = q; } #endif /* ----------------------------------------------------- */ /* ----------- */ /* Buffer? */ /* ----------- */ /* First, make use of buffer if can. This is faster in most cases. A notable exception is equal size case which is best done in situ (let drop through). Compare only the smaller size with buffer. If left is smaller, we can use post-increment addressing which is the faster mode. If right is smaller, use pre-decrement mode. We omit seeing if right-smaller will work with post-increment mode (if left also fits buffer). Preflighting overhead swallows us up very quickly. */ #if UseBuffer == 1 if( nL < nR ) { if( nL <= swapSize ) { CopyInc( swapPtr, p0, nL ); CopyInc( p0, p2, nR ); CopyInc( (Byte*)p0 + nR, swapPtr, nL ); return; } } else if( nL > nR ) { if( nR <= swapSize ) { CopyInc( swapPtr, p2, nR ); CopyDec( (Byte*)p0 + nR, p0, nL ); CopyInc( p0, swapPtr, nR ); return; } } #endif /* ----------- */ /* In Situ */ /* ----------- */ /* This algorithm always does the job, buffer or not. Find the smaller block. Swap it immediately into its final place. Now the larger block is in two out-of-order, but contiguous pieces. Wait a minute, this is what we started with! The only differences are: now the sizes are {smaller, larger - smaller}, and the start addresses have to keep up with the new pieces. We repeat until the two pieces were the same length. In other words, the final swap didn't break anybody in two. This can end with sizes larger than Atom-Atom. It depends on whether the smaller evenly divides the larger. */ done = false; do { pL = p0; pR = p2; if( nL < nR ) { nR = nR - nL; pR = (Atom*)((Byte*)pR + nR); nS = nL; } else if( nL > nR ) { p0 = (Atom*)((Byte*)pL + nR); nL = nL - nR; nS = nR; } else { nS = nL; done = true; } Swap; } while( !done ); }